M2.859 · Visualización de datos · PEC2

Estudios de Informática, Multimedia y Telecomunicación

Máster Ciencia de Datos

 

VISUALIZACIÓN DE DATOS


PEC 2: Estudio de técnicas de visualización de datos



CGD
Mayo 2023



1. Sankey diagram


In [1]:
!pip3 install -U kaleido
import urllib, json
import plotly.graph_objects as go

url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
response = urllib.request.urlopen(url)
data = json.loads(response.read())

fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    valuesuffix = "TWh",
    # Se definen los nodos
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label =  data['data'][0]['node']['label'],
      color =  data['data'][0]['node']['color']
    ),
    # Add links
    link = dict(
      source =  data['data'][0]['link']['source'],
      target =  data['data'][0]['link']['target'],
      value =  data['data'][0]['link']['value'],
      label =  data['data'][0]['link']['label'],
      color =  data['data'][0]['link']['color']))])

fig.update_layout(title_text="Sankey diagram<br>Agencia de Información de Energía de EEUU", font_size=10)
fig.show()


# Se guarda como archivo en local
fig.write_image("sankey_diagram.png")

# Guardar figura en archivo HTML
fig.write_html("sankey_diagram.html")
Requirement already satisfied: kaleido in /Users/cgd/opt/anaconda3/envs/visualization/lib/python3.9/site-packages (0.2.1)

2. Matrix Correlation


In [2]:
import plotly.graph_objects as go
import pandas as pd

facies_data = pd.read_csv('/Users/cgd/* VD/PEC2/facies_data.csv')
facies_data
Out[2]:
Facies Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
0 3 A1 SH SHRIMPLIN 2793.0 77.450 0.664 9.900 11.915 4.600 1 1.000
1 3 A1 SH SHRIMPLIN 2793.5 78.260 0.661 14.200 12.565 4.100 1 0.979
2 3 A1 SH SHRIMPLIN 2794.0 79.050 0.658 14.800 13.050 3.600 1 0.957
3 3 A1 SH SHRIMPLIN 2794.5 86.100 0.655 13.900 13.115 3.500 1 0.936
4 3 A1 SH SHRIMPLIN 2795.0 74.580 0.647 13.500 13.300 3.400 1 0.915
... ... ... ... ... ... ... ... ... ... ... ...
3227 5 C LM CHURCHMAN BIBLE 3120.5 46.719 0.947 1.828 7.254 3.617 2 0.685
3228 5 C LM CHURCHMAN BIBLE 3121.0 44.563 0.953 2.241 8.013 3.344 2 0.677
3229 5 C LM CHURCHMAN BIBLE 3121.5 49.719 0.964 2.925 8.013 3.190 2 0.669
3230 5 C LM CHURCHMAN BIBLE 3122.0 51.469 0.965 3.083 7.708 3.152 2 0.661
3231 5 C LM CHURCHMAN BIBLE 3122.5 50.031 0.970 2.609 6.668 3.295 2 0.653

3232 rows × 11 columns

In [3]:
corr_matrix = round(facies_data.corr(), 4)
corr_matrix
/var/folders/v4/74xgv0954936fxrjdx6yw9380000gp/T/ipykernel_11042/3787482836.py:1: FutureWarning:

The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.

Out[3]:
Facies Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
Facies 1.0000 0.3402 -0.3443 0.3945 -0.2341 -0.3559 0.7042 0.8547 0.0685
Depth 0.3402 1.0000 -0.0641 0.1780 -0.0914 -0.0744 0.2776 0.2971 0.0014
GR -0.3443 -0.0641 1.0000 -0.1556 0.1900 0.2483 -0.2890 -0.2811 -0.1735
ILD_log10 0.3945 0.1780 -0.1556 1.0000 -0.1176 -0.5229 0.3846 0.5193 0.0880
DeltaPHI -0.2341 -0.0914 0.1900 -0.1176 1.0000 -0.2502 0.0115 -0.1740 0.0366
PHIND -0.3559 -0.0744 0.2483 -0.5229 -0.2502 1.0000 -0.5734 -0.4884 -0.0345
PE 0.7042 0.2776 -0.2890 0.3846 0.0115 -0.5734 1.0000 0.6571 0.0189
NM_M 0.8547 0.2971 -0.2811 0.5193 -0.1740 -0.4884 0.6571 1.0000 0.0366
RELPOS 0.0685 0.0014 -0.1735 0.0880 0.0366 -0.0345 0.0189 0.0366 1.0000
In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
import mpld3

# Se crea matriz de correlación usando Seaborn
heatmap = sns.heatmap(corr_matrix, annot=True, cmap='magma')
plt.title('Correlation Matrix', fontsize=15, color= 'navy', fontweight='bold')
plt.show(heatmap)

# Se obtiene figura
fig_cor = heatmap.get_figure()

# Se guarda como archivo en local
fig_cor.savefig("correlation_matrix.png")
d3fig = mpld3.fig_to_html(fig_cor)

# Guardar figura en archivo HTML
with open('correlation_matrix.html', 'w') as f:
    f.write(d3fig)

3. Raincloud plot


In [5]:
%matplotlib inline
import pandas as pd

# Se cargan los datos
csv_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_df = pd.read_csv(csv_url, header = None)
col_names = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width','Species']

# Se guardan los datos como pandas dataframe
iris_df =  pd.read_csv(csv_url, names = col_names)
iris_df.head()
Out[5]:
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
In [7]:
import seaborn as sns
import matplotlib.pyplot as plt
from ptitprince import half_violinplot

iris = sns.load_dataset('iris')

dy = "species"
dx = "sepal_length"
ort = "h"
pal = "Set2"

# Se grafica 
fig, ax = plt.subplots(figsize=(7, 5))
ax = half_violinplot(x=dx, y=dy, data=iris, palette=pal, bw=.2, cut=0.,
                     scale="area", width=.6, inner=None, orient=ort)
ax = sns.stripplot(x=dx, y=dy, data=iris, palette=pal, edgecolor="white",
                   size=3, jitter=1, zorder=0, orient=ort)
ax = sns.boxplot(x=dx, y=dy, data=iris, color="black", width=.15, zorder=10,
                 showcaps=True, boxprops={'facecolor': 'none', "zorder": 10},
                 showfliers=True, whiskerprops={'linewidth': 2, "zorder": 10},
                 saturation=1, orient=ort)
plt.title('Raincloud plot con Iris Species dataset', fontsize=15, color= 'navy', fontweight='bold')
plt.show()

# Se obtiene figura
fig_iris = fig.get_figure()

# Se guarda como archivo en local
fig_iris.savefig("Raincloud.png") # Convertir figura en formato HTML
d3fig = mpld3.fig_to_html(fig_iris)

# Guardar figura en archivo HTML
with open('raincloud.html', 'w') as f:
    f.write(d3fig)